

cd D:\Dropbox\book_welfare\replication\

****************************
* create  PULITZER data
****************************

import excel data\pulitzer.xlsx, sheet("Sheet1") firstrow clear
 
	split StartingPageURL, parse("/")
	gen year= real(StartingPageURL5)
	 
	split author, parse("by " " (")

	rename author2 name 

	br name category

	split name, parse("," " and ")
	keep name? category year

	gen id=_n

	reshape long name, i( category year id) j(num)
	drop if name==""
	drop if name==" Jr."
	gen type="winner"

	tempfile winner
save `winner'


import excel data\pulitzer.xlsx, sheet("Sheet1") firstrow clear
 
 
	split StartingPageURL, parse("/")
	gen year= real(StartingPageURL5)

	 
	split fina1 , parse("by " " (")

	rename fina12 name 

	split name, parse("," " and ")
	keep name? category year

	gen id=_n

	reshape long name, i(year id) j(num)
	drop if name==""
	drop if name==" Jr."
	gen type="finalist"

	tempfile final
save `final'



import excel data\pulitzer.xlsx, sheet("Sheet1") firstrow clear

	split StartingPageURL, parse("/")
	gen year= real(StartingPageURL5)

	split fin2 , parse("by " " (")

	rename fin22 name 


	split name, parse("," " and ")
	keep name? category year

	gen id=_n

	reshape long name, i(year id) j(num)
	drop if name==""
	drop if name==" Jr."
	replace name=subinstr(name,"the late ","",.)
	gen type="finalist"


append using `final'
append using `winner'


	rename name fullname 
	split fullname, parse(" ")
	rename fullname1 name 
	gen NAME=upper(name)
	drop name 
	rename NAME name 
	merge m:1 name using data\name_gender_wipo.dta

	drop if _merge ==2 
	drop _merge 

	
	gen x=1
	gen dname = name~=""
	gen dmatch = mshare~=. 
	gen femshare = 1- mshare 
	
 preserve 
	keep if femshare==.
	keep fullname 
	gen x = 1 
	collapse (sum) n=x, by(fullname)
	gsort - n
	br fullname n 
 restore 

preserve 
	import excel data\hand_match_name_gender.xlsx, sheet("pulitzer") firstrow clear
	keep fullname male 
	gen xfemshare=1 if male==0 
	replace xfemshare=0 if male==1 
	keep fullname xfemshare
	tempfile fem 
	save `fem'
restore 
merge m:1 fullname using `fem'
 
replace femshare = xfemshare if xfemshare~=. 
 drop xfemshare 
 drop if _merge ==2 
 drop _merge 
	
	
collapse (sum) N=x  Nmatch=dmatch Nfemale=femshare, by(year category)
	
save data\pulitzer.dta, replace  
	